import HTMLParser
import urllib
import re
import string


urlString = "http://www.nl-ix.net/members/full-memberlist/"
urlText = []
pathOut = "ixp_members.txt"


class Parse40 (HTMLParser.HTMLParser):
    
    check = False
    
    def handle_starttag (self, tag, attrs):
        if tag == "td":
            for name, value in attrs:
                if name == "align" and value == "right":
                    self.check = True
    
    def handle_data (self, data):
        if self.check == True and re.match("^AS\d+$",data):
            data = re.sub("\D","",data)
            if data not in urlText:
                urlText.append(data)
        self.check = False


lparser = Parse40()
# leggo la pagina in una stringa e elimino "<!- RSS Autodiscovery ->" altrimenti il parser non funziona
page = urllib.urlopen(urlString).read()
page = string.replace(page, "<!- RSS Autodiscovery ->", "")
lparser.feed(page)
fileOut = open(pathOut, "a")
for item in urlText:
    #print item # debug
    print >> fileOut, "40 %s" % item
fileOut.close()